## Load libraries


suppressPackageStartupMessages({
library(ArchR)
library(rhdf5)
library(tidyverse)
library(reticulate)
library(zellkonverter)
library(Matrix)
library(dichromat)
library(Seurat)
#library(caret)
h5disableFileLocking()})
rna_seurat <- readRDS("Seurat_objects/rna_Seurat_object")
hvg <- VariableFeatures(rna_seurat)
# directory where to save the figures
plot_dir <- "/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/report_chromvar/"
# get gene expression matrix
proj <- loadArchRProject("12_Ricards_peaks_ChromVar")


# get the metadata 
metadata <- as.data.frame(getCellColData(proj))

ChromVar Motif deviation scores


# get motif matrix
motifs <- getMatrixFromProject(proj, useMatrix = "MotifMatrix")
motif_mtx <- assays(motifs)[[2]]
# remove index number from TFs
tfs <- gsub("_.*", "", rownames(motifs))
rownames(motif_mtx) <- tfs

Gene activity scores

gene_scores <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix")
gene_scores_mat <- assays(gene_scores)[[1]]
rownames(gene_scores_mat) <- rowData(gene_scores)$name
colnames(gene_scores_mat) <- colnames(gene_scores)

Deep Learning deviations

proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")
# get motif matrix
deep_motifs <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_motif_mtx <- assays(deep_motifs)[[2]]

rownames(deep_motif_mtx) <- tolower(rownames(deep_motif_mtx))
substr(rownames(deep_motif_mtx), 1, 1) <- toupper(substr(rownames(deep_motif_mtx), 1, 1))


tfs <- rownames(deep_motif_mtx)
metadata <- colData(deep_motifs) %>% as.data.frame()

Plotting Functions

plot_score_per_celltypes <- function(tf, score_matrix, metadata_df, y_label){
  motif_n <- score_matrix[rownames(score_matrix) %in% tf, ]
  plot <- metadata_df %>% 
    mutate(!!tf := motif_n) %>%
    group_by(celltypes) %>%
    summarise(mean = mean(!!(sym(tf)))) %>%
    ggplot() +
    geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    labs(y = paste0(y_label),
         title = paste0(n)) +
    BAR_THEME
  
  return(plot)
}

scatterplot <- function(tf, score_matrix_x, score_matrix_y, metadata_df, x_label, y_label){
  motif_x <- score_matrix_x[rownames(score_matrix_x) %in% tf, ]
  motif_y <- score_matrix_y[rownames(score_matrix_y) %in% tf, ]
  plot <- metadata_df %>% 
    mutate(tf_x := motif_x, tf_y := motif_y) %>%
    group_by(celltypes) %>%
    summarise_at(vars(tf_x, tf_y), mean) %>% 
    ggplot() +
    geom_smooth(aes(x = tf_x, y = tf_y),
                formula = y ~ x, method = "lm", size =.1) +
    geom_point(aes(x = tf_x, y = tf_y, col = celltypes, size = 1)) +
    scale_color_manual(values = col) + 
    labs(title = paste0(tf), 
         x = paste0(x_label),
         y = paste0(y_label)) +
    SCATTER_THEME
    
  return(plot)
}

Plot Themes

BAR_THEME <- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10),
                   axis.text.y = element_text(size = 12),
                   axis.title.x = element_text(size = 15), 
                   axis.title.y = element_text(size = 15),
                   plot.title = element_text(hjust = 0.5, size = 20),
          legend.position = "none",
          panel.grid.major = element_line(colour = "grey"),   # Major grid lines
          panel.background = element_rect(fill = "white", colour = "black")) 

SCATTER_THEME <- theme(axis.text.x = element_text(size = 12),
                   axis.text.y = element_text(size = 12),
                   axis.title.x = element_text(size = 15), 
                   axis.title.y = element_text(size = 15),
                   plot.title = element_text(hjust = 0.5, size = 20),
          legend.position = "none",
          panel.grid.major = element_line(colour = "grey"),   # Major grid lines
          panel.background = element_rect(fill = "white", colour = "black")) 

Color Palette:

colPalette_celltypes = c('#532C8A',
 '#c19f70',
 '#f9decf',
 '#c9a997',
 '#B51D8D',
 '#3F84AA',
 '#9e6762',
 '#354E23',
 '#F397C0',
 '#ff891c',
 '#635547',
 '#C72228',
 '#f79083',
 '#EF4E22',
 '#989898',
 '#7F6874',
 '#8870ad',
 '#647a4f',
 '#EF5A9D',
 '#FBBE92',
 '#139992',
 '#cc7818',
 '#DFCDE4',
 '#8EC792',
 '#C594BF',
 '#C3C388',
 '#0F4A9C',
 '#FACB12',
 '#8DB5CE',
 '#1A1A1A',
 '#C9EBFB',
 '#DABE99',
 '#65A83E',
 '#005579',
 '#CDE088',
 '#f7f79e',
 '#F6BFCB')

celltypes <- (as.data.frame(getCellColData(proj)) %>% group_by(celltypes) %>% 
 summarise(n = n()))$celltypes

col <- setNames(colPalette_celltypes, celltypes)

Select a few marker genes and transcription factors:

marker_genes <- c("Lamb1",  "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
                  "Apoa2", "Apoe", "Cystm1", "Emb", "Spink1",  "Krt19",
                  "Dkk1", "Grhl3", "Trp63", "Grhl2",  "Pax6", "Pax2",
                  "En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
                  "Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
                  "Hes3", "Hba-a2", "Hba-a1",  "Hbb-bh1", "Gata1",
                   "Gata6",
                  #"Gata6", "Gata5",
                  "Cited4",
                   "Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
                  "Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
                  "Hoxa10", "Tnnt2", "Myl4",  "Myl7", "Acta2",
                  "Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
                  "Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
                  "Mesp2", "Lefty2", "Mesp1", "Cer1",  "Chrd", 
                  "Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
                   "Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
                  "Epcam", "Pou5f1" )
#"Sox2"
#"Gata2"
#"Gata4"
#  "Gata5",
#"Gata6",
# "Gsc",

Deep Learning motifs vs. cisbp motifs


markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15", 
  "Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4",  "Gata6", "Sox10",
  "Sox11" ,"Sox13","Sox15","Sox17"      
  ,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )

# select only markers present in all three matrices 
markers <- markers[markers %in% rownames(deep_motif_mtx)]
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]



for (n in markers){
  ## BAR PLOTS
  score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
                                         y_label = "Gene activity score")
  ggsave(paste0(plot_dir, "score", n, ".pdf"))
  
  
  motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata, 
                                         y_label = "ChromVar scores")
  ggsave(paste0(plot_dir, "motif", n, ".pdf"))
  
  
  deep_plot <- plot_score_per_celltypes(n, deep_motif_mtx, metadata,
                                        y_label = "Deep learning ChromVar scores")
  ggsave(paste0(plot_dir, "deep", n, ".pdf"), deep_plot)
  
  
  ## SCATTER PLOTS
  scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata, 
              x_label = "Gene activity scores",
              y_label = "ChromVar scores")
  ggsave(paste0(plot_dir, "scatter_motif", n, ".pdf"), scatter_motif)

  
  scatter_deep <- scatterplot(n, gene_scores_mat, deep_motif_mtx, metadata,
              x_label = "Gene activity scores",
              y_label = "Deep learning ChromVar scores")
  ggsave(paste0(plot_dir, "scatter_deep", n, ".pdf"), scatter_deep)


  ## Combine Plots
  print(cowplot::plot_grid(score_plot, motif_plot, deep_plot,
                           scatter_motif, scatter_deep, ncol = 3))
  
  
}

ChromVar scores using cisbp


markers <- c("Lamb1",  "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
                  "Apoa2", "Apoe", "Cystm1", "Emb", "Spink1",  "Krt19",
                  "Dkk1", "Grhl3", "Trp63", "Grhl2",  "Pax6", "Pax2",
                  "En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
                  "Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
                  "Hes3", "Hba-a2", "Hba-a1",  "Hbb-bh1", "Gata1",
                   "Gata6",
                  #"Gata6", "Gata5",
                  "Cited4",
                   "Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
                  "Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
                  "Hoxa10", "Tnnt2", "Myl4",  "Myl7", "Acta2",
                  "Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
                  "Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
                  "Mesp2", "Lefty2", "Mesp1", "Cer1",  "Chrd", 
                  "Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
                   "Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
                  "Epcam", "Pou5f1" )

# select only markers prsent in both matrices
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]

for (n in markers){
  ## BAR PLOTS
  score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
                                         y_label = "Gene activity score")

  
  motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata, 
                                         y_label = "ChromVar scores")

  
  
  ## SCATTER PLOTS
  scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata, 
              x_label = "Gene activity scores",
              y_label = "ChromVar scores")


  ## Combine Plots
  print(cowplot::plot_grid(score_plot, motif_plot,
                           scatter_motif, ncol = 2))
  
  
}

GATA Factors

```#{r, fig.width=6, fig.height=6} for (n in c(“Gata1”, “Gata2”, “Gata3”, “Gata4”, “Gata5”, “Gata6”)) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% n, ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene expr_n <- lognorm[rownames(lognorm) %in% c(n), ] metadata[paste0(“expr_”, n)] <- expr_n

seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] sea_meta[paste0(“seacell_”, n)] <- seacells_n

# select motif z score motif_n <- motif_mtx[rownames(motif_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] metadata[paste0(“motif_”, n)] <- motif_n

# create barplots for gene scores, gene expression and motif z-score plots <- map(c(“score_”, “motif_”), function(p){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE))) plot <- df %>% ggplot() + geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), x = “celltype”, y = paste0(p)) + BAR_THEME print(plot) ggsave(paste0(plot_dir, p, n, “.pdf”)) print(plot) }) sea_plot <- map(seq.int(1), function(sea){ plot <- sea_meta %>% group_by(celltypes) %>% summarise(mean = mean(!!sym(paste0(“seacell_”,n)))) %>% ggplot() + geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), y = “SEACell deviation score”) + BAR_THEME print(plot) ggsave(paste0(plot_dir, “seacell_bar_”, n, “.pdf”)) print(plot)

}) # create scatter plots for gene expression and motif z-score scatter_plots <- map(seq.int(1), function(s){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(“motif_”, n), paste0(“score_”, n)), funs(mean(., na.rm = TRUE))) plot <- df %>% ggplot() + geom_smooth(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“motif_”, n))), formula = y ~ x, method = “lm”, size = .1) + geom_point(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“motif_”, n)), col = celltypes, size = 1)) + SCATTER_THEME + #labs(x = “Gata1 gene expression”, y = “Gata1 motif accessibility (z-score)”) + scale_color_manual(values = col) + labs(title = paste0(n), x = “gene activity score”, y = “TF-motif z-score”) print(plot) ggsave(paste0(plot_dir, “scatterPlot_”, n, “.pdf”)) print(plot) })

do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) #%>% ggsave(paste0(plot_dir, n, “.pdf”))

}


# Marker Genes

## SEACells


```#{r, fig.width=6, fig.height=6, eval = FALSE}

for (n in marker_genes) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # if the marker gene is a TF
  if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
    
      # select motif z score
      motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
      metadata[paste0("motif_", n)] <- motif_n
      
      # create barplots for gene scores, gene expression and motif z-score
      plot <- map(c("score_", "motif_"), function(p){
        df <- metadata %>%
          group_by(celltypes) %>%
          summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
          plot <- df %>% ggplot() +
          geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                       fill = celltypes), stat = "identity") +
          scale_fill_manual(values = col) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                legend.position = "none") +
          labs(title = paste0(n), x = "celltype", y = paste0(p)) +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, p, n, ".pdf"))
        print(plot)
      })
      
      # create scatter plots for gene expression and motif z-score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>% 
        summarise_at(vars(paste0("motif_", n), 
                          paste0("score_", n)), 
                     funs(mean(., na.rm = TRUE)))
        plot <- df %>%
          ggplot() +
          geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                          y = df %>% pull(paste0("motif_", n))),
                      formula = y ~ x, method = "lm", size = .1) +
          geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                         y = df %>% pull(paste0("motif_", n)), 
                         col = celltypes, size = 1)) +
          #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
          theme(legend.position = "none") +
          scale_color_manual(values = col) +
          labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
        print(plot)
      })
        sea_plot <- map(seq.int(1), function(sea){
          plot <- sea_meta %>% 
            group_by(celltypes) %>%
            summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
            ggplot() +
            geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
            scale_fill_manual(values = col) +
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
            labs(title = paste0(n), y = "SEACell deviation score") +
            BAR_THEME
          print(plot)
          ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
          print(plot)
          
    })
      
    # if the marker gene is no TF
    } else {
      print("no")
      
      # # create barplots for gene scores, gene expression and motif z-score
      # plots <- map(c("score_"), function(p){
      #   df <- metadata %>%
      #     group_by(celltypes) %>%
      #     summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
      #     plot <- df %>% ggplot() +
      #     geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
      #                  fill = celltypes), stat = "identity") +
      #     scale_fill_manual(values = col) +
      #     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
      #           legend.position = "none") +
      #     labs(title = paste0(n), x = "celltype", y = paste0(p)) +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, p, n, ".pdf"))
      #   print(plot)
      # 
      # })
      # 
      # # create scatter plots for gene expression and gene_score
      # scatter_plots <- map(seq.int(1), function(s){
      #   df <- metadata %>% group_by(celltypes) %>%
      #   summarise_at(vars(paste0("score_", n),
      #                     paste0("expr_", n)),
      #                funs(mean(., na.rm = TRUE)))
      #   plot <- df %>%
      #     ggplot() +
      #     geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
      #                     y = df %>% pull(paste0("score_", n))),
      #                 formula = y ~ x, method = "lm", size = .1) +
      #     geom_point(aes(x = df %>% pull(paste0("expr_", n)),
      #                    y = df %>% pull(paste0("score_", n)),
      #                    col = celltypes, size = 1)) +
      #     #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
      #     theme(legend.position = "none") +
      #     scale_color_manual(values = col)+
      #     labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
      #   print(plot)
      #})
    }

  #do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
  # 

}

Deep learning motifs

proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")

deep <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_matrix <- assays(deep)[[2]]

deep_matrix <- deep_matrix[, colnames(motif_mtx)]

stopifnot(all(colnames(motif_mtx) == colnames(deep_matrix)))
#test <- rownames(deep_matrix)
rownames(deep_matrix) <- tolower(rownames(deep_matrix))
substr(rownames(deep_matrix), 1, 1) <- toupper(substr(rownames(deep_matrix), 1, 1))

TFs of interest

markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15", 
  "Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4",  "Gata6", "Sox10",
  "Sox11" ,"Sox13","Sox15","Sox17"      
  ,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )

markers <- markers[markers %in% rownames(deep_matrix)]

markers <- markers[markers %in% rownames(gene_scores_mat)]
markers

```#{r, fig.width=6, fig.height=6} plot_dir <- “/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/deep_ChromVar/”

for (n in markers) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% n, ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene # expr_n <- lognorm[rownames(lognorm) %in% c(n), ] # metadata[paste0(“expr_”, n)] <- expr_n

# seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] # sea_meta[paste0(“seacell_”, n)] <- seacells_n

# select motif z score deep_n <- deep_matrix[rownames(deep_matrix) %in% n, ] metadata[paste0(“deep_”, n)] <- deep_n

# create barplots for gene scores, gene expression and motif z-score plots <- map(c(“score_”, “deep_”), function(p){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE))) plot <- df %>% ggplot() + geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), fill = celltypes), stat = “identity”) + scale_fill_manual(values = col) + labs(title = paste0(n), x = “celltype”, y = paste0(p)) + BAR_THEME print(plot) ggsave(paste0(plot_dir, p, n, “.pdf”)) print(plot) }) scatter_plots <- map(seq.int(1), function(s){ df <- metadata %>% group_by(celltypes) %>% summarise_at(vars(paste0(“deep_”, n), paste0(“score_”, n)), funs(mean(., na.rm = TRUE))) plot <- df %>% ggplot() + geom_smooth(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“deep_”, n))), formula = y ~ x, method = “lm”, size = .1) + geom_point(aes(x = df %>% pull(paste0(“score_”, n)), y = df %>% pull(paste0(“deep_”, n)), col = celltypes, size = 1)) + SCATTER_THEME + #labs(x = “Gata1 gene expression”, y = “Gata1 motif accessibility (z-score)”) + scale_color_manual(values = col) + labs(title = paste0(n), x = “gene activity score”, y = “TF-motif z-score (deep learning)”) print(plot) ggsave(paste0(plot_dir, “scatterPlot_”, n, “.pdf”)) print(plot) })

do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) #%>% ggsave(paste0(plot_dir, n, “.pdf”))

}



```#{r}
for (n in marker_genes) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # if the marker gene is a TF
  if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
    
      # select motif z score
      motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
      metadata[paste0("motif_", n)] <- motif_n
      
      # create barplots for gene scores, gene expression and motif z-score
      plot <- map(c("score_", "motif_"), function(p){
        df <- metadata %>%
          group_by(celltypes) %>%
          summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
          plot <- df %>% ggplot() +
          geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                       fill = celltypes), stat = "identity") +
          scale_fill_manual(values = col) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                legend.position = "none") +
          labs(title = paste0(n), x = "celltype", y = paste0(p)) +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, p, n, ".pdf"))
        print(plot)
      })
      
      # create scatter plots for gene expression and motif z-score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>% 
        summarise_at(vars(paste0("motif_", n), 
                          paste0("score_", n)), 
                     funs(mean(., na.rm = TRUE)))
        plot <- df %>%
          ggplot() +
          geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                          y = df %>% pull(paste0("motif_", n))),
                      formula = y ~ x, method = "lm", size = .1) +
          geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                         y = df %>% pull(paste0("motif_", n)), 
                         col = celltypes, size = 1)) +
          #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
          theme(legend.position = "none") +
          scale_color_manual(values = col) +
          labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
        print(plot)
      })
        sea_plot <- map(seq.int(1), function(sea){
          plot <- sea_meta %>% 
            group_by(celltypes) %>%
            summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
            ggplot() +
            geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
            scale_fill_manual(values = col) +
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
            labs(title = paste0(n), y = "SEACell deviation score") +
            BAR_THEME
          print(plot)
          ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
          print(plot)
          
    })
      
    # if the marker gene is no TF
    } else {
      print("no")
      
      # # create barplots for gene scores, gene expression and motif z-score
      # plots <- map(c("score_"), function(p){
      #   df <- metadata %>%
      #     group_by(celltypes) %>%
      #     summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
      #     plot <- df %>% ggplot() +
      #     geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
      #                  fill = celltypes), stat = "identity") +
      #     scale_fill_manual(values = col) +
      #     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
      #           legend.position = "none") +
      #     labs(title = paste0(n), x = "celltype", y = paste0(p)) +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, p, n, ".pdf"))
      #   print(plot)
      # 
      # })
      # 
      # # create scatter plots for gene expression and gene_score
      # scatter_plots <- map(seq.int(1), function(s){
      #   df <- metadata %>% group_by(celltypes) %>%
      #   summarise_at(vars(paste0("score_", n),
      #                     paste0("expr_", n)),
      #                funs(mean(., na.rm = TRUE)))
      #   plot <- df %>%
      #     ggplot() +
      #     geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
      #                     y = df %>% pull(paste0("score_", n))),
      #                 formula = y ~ x, method = "lm", size = .1) +
      #     geom_point(aes(x = df %>% pull(paste0("expr_", n)),
      #                    y = df %>% pull(paste0("score_", n)),
      #                    col = celltypes, size = 1)) +
      #     #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
      #     theme(legend.position = "none") +
      #     scale_color_manual(values = col)+
      #     labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
      #   print(plot)
      #})
    }

  #do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
  # 

}

Plot gene expression, gene scores & motif z-scores:

```#{r, fig.width=10, fig.height=10}

for (n in marker_genes) { print(n) # select one row for a particular gene score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ] # add score for this gene to metadata metadata[paste0(“score_”,n)] <- score_n # select gene expression for a particular gene expr_n <- lognorm[rownames(lognorm) %in% c(n), ] metadata[paste0(“expr_”, n)] <- expr_n

seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0(“^”, n), tfs)], ] sea_meta[paste0(“seacell_”, n)] <- seacells_n

# if the marker gene is a TF if (length(tfs[grepl(paste0(“^”, n), tfs)]) > 0) {

  # select motif z score
  motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
  metadata[paste0("motif_", n)] <- motif_n
  
  # create barplots for gene scores, gene expression and motif z-score
  plots <- map(c("score_", "expr_", "motif_"), function(p){
    df <- metadata %>%
    group_by(celltypes) %>%
    summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
    df %>% ggplot() +
    geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                 fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          legend.position = "none") +
    labs(title = paste0(n), x = "celltype", y = paste0(p))
  })
  
  # create scatter plots for gene expression and motif z-score
  scatter_plots <- map(seq.int(1), function(s){
    df <- metadata %>% group_by(celltypes) %>% 
    summarise_at(vars(paste0("motif_", n), 
                      paste0("score_", n)), 
                 funs(mean(., na.rm = TRUE)))
    df %>%
    ggplot() +
    geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                    y = df %>% pull(paste0("motif_", n))),
                formula = y ~ x, method = "lm", size = .1) +
    geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                   y = df %>% pull(paste0("motif_", n)), 
                   col = celltypes, size = 1)) +
    #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
    theme(legend.position = "none") +
    scale_color_manual(values = col) +
    labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score")
    })
  
# if the marker gene is no TF
} else {
  
  # create barplots for gene scores, gene expression and motif z-score
  plots <- map(c("score_", "expr_"), function(p){
    df <- metadata %>%
    group_by(celltypes) %>%
    summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
    df %>% ggplot() +
    geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                 fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          legend.position = "none") +
    labs(title = paste0(n), x = "celltype", y = paste0(p))

  })
  
  # create scatter plots for gene expression and gene_score
  scatter_plots <- map(seq.int(1), function(s){
    df <- metadata %>% group_by(celltypes) %>%
    summarise_at(vars(paste0("score_", n),
                      paste0("expr_", n)),
                 funs(mean(., na.rm = TRUE))) 
    df %>%
    ggplot() +
    geom_smooth(aes(x = df %>% pull(paste0("expr_", n)), 
                    y = df %>% pull(paste0("score_", n))),
                formula = y ~ x, method = "lm", size = .1) +
    geom_point(aes(x = df %>% pull(paste0("expr_", n)),
                   y = df %>% pull(paste0("score_", n)),
                   col = celltypes, size = 1)) +
    #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
    theme(legend.position = "none") +
    scale_color_manual(values = col)+
    labs(title = paste0(n), x = "gene expression", y = "gene activity score")

  })
}

do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2)) #

}




```#{r, fig.width=10, fig.height=10}
p1 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(Gata1), funs(median(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = Gata1, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

p2 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(gata1_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = gata1_score_p2g, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")


p3 <- metadata %>% group_by(celltypes) %>% 
  summarise_at(vars(gata1_z_scores, Gata1), funs(mean(., na.rm = TRUE))) %>%
  ggplot() +
  geom_smooth(aes(x = Gata1, y = gata1_z_scores), formula = y ~ x, method = "lm", size = .1) +
  geom_point(aes(x = Gata1, y = gata1_z_scores, col = celltypes, size = 1)) +
  labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
  theme(legend.position = "none") +
  scale_color_manual(values = col) 

p4 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(gata1_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = gata1_z_scores, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

cowplot::plot_grid(p1, p2, p3, p4)

p1 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(Sox9), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = Sox9, fill = celltypes), stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "none") +
  scale_fill_manual(values = col)



p2 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(sox9_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = sox9_z_scores, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

p3 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(sox9_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = sox9_score_p2g, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")


p4 <- metadata %>% group_by(celltypes) %>% 
  summarise_at(vars(sox9_z_scores, Sox9), funs(mean(., na.rm = TRUE))) %>%
  ggplot() +
  geom_smooth(aes(x = Sox9, y = sox9_z_scores), formula = y ~ x, method = "lm", size = .1) +
  geom_point(aes(x = Sox9, y = sox9_z_scores, col = celltypes, size = 1)) +
  labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
  theme(legend.position = "none") +
  scale_color_manual(values = col) 

cowplot::plot_grid(p1, p2, p3, p4)
---
title: "14_Barplots"
output: 
  html_document:
    toc: true
    toc_depth: 5
    code_folding: hide
    toc_float: true
    code_download: true
    theme: cosmo
    highlight: textmate
---

<style>
body {
text-align: justify}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(cache = FALSE, autodep = TRUE, 
                      collapse = TRUE, message = FALSE)
knitr::opts_knit$set(root.dir = "/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data/")
setwd("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data/")

set.seed(1)
```

  
```{r}
## Load libraries


suppressPackageStartupMessages({
library(ArchR)
library(rhdf5)
library(tidyverse)
library(reticulate)
library(zellkonverter)
library(Matrix)
library(dichromat)
library(Seurat)
#library(caret)
h5disableFileLocking()})
```

```#{r}
rna_seurat <- readRDS("Seurat_objects/rna_Seurat_object")
hvg <- VariableFeatures(rna_seurat)
```
```{r}
# directory where to save the figures
plot_dir <- "/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/report_chromvar/"
```




```{r}
# get gene expression matrix
proj <- loadArchRProject("12_Ricards_peaks_ChromVar")


# get the metadata 
metadata <- as.data.frame(getCellColData(proj))
```

### ChromVar Motif deviation scores

```{r}

# get motif matrix
motifs <- getMatrixFromProject(proj, useMatrix = "MotifMatrix")
motif_mtx <- assays(motifs)[[2]]
# remove index number from TFs
tfs <- gsub("_.*", "", rownames(motifs))
rownames(motif_mtx) <- tfs
```



### Gene activity scores


```{r}
gene_scores <- getMatrixFromProject(proj, useMatrix = "GeneScoreMatrix")
gene_scores_mat <- assays(gene_scores)[[1]]
rownames(gene_scores_mat) <- rowData(gene_scores)$name
colnames(gene_scores_mat) <- colnames(gene_scores)

```




### Deep Learning deviations

```{r}
proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")
# get motif matrix
deep_motifs <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_motif_mtx <- assays(deep_motifs)[[2]]

rownames(deep_motif_mtx) <- tolower(rownames(deep_motif_mtx))
substr(rownames(deep_motif_mtx), 1, 1) <- toupper(substr(rownames(deep_motif_mtx), 1, 1))


tfs <- rownames(deep_motif_mtx)
metadata <- colData(deep_motifs) %>% as.data.frame()

```





## Plotting Functions

```{r}
plot_score_per_celltypes <- function(tf, score_matrix, metadata_df, y_label){
  motif_n <- score_matrix[rownames(score_matrix) %in% tf, ]
  plot <- metadata_df %>% 
    mutate(!!tf := motif_n) %>%
    group_by(celltypes) %>%
    summarise(mean = mean(!!(sym(tf)))) %>%
    ggplot() +
    geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    labs(y = paste0(y_label),
         title = paste0(n)) +
    BAR_THEME
  
  return(plot)
}

scatterplot <- function(tf, score_matrix_x, score_matrix_y, metadata_df, x_label, y_label){
  motif_x <- score_matrix_x[rownames(score_matrix_x) %in% tf, ]
  motif_y <- score_matrix_y[rownames(score_matrix_y) %in% tf, ]
  plot <- metadata_df %>% 
    mutate(tf_x := motif_x, tf_y := motif_y) %>%
    group_by(celltypes) %>%
    summarise_at(vars(tf_x, tf_y), mean) %>% 
    ggplot() +
    geom_smooth(aes(x = tf_x, y = tf_y),
                formula = y ~ x, method = "lm", size =.1) +
    geom_point(aes(x = tf_x, y = tf_y, col = celltypes, size = 1)) +
    scale_color_manual(values = col) + 
    labs(title = paste0(tf), 
         x = paste0(x_label),
         y = paste0(y_label)) +
    SCATTER_THEME
    
  return(plot)
}

```


### Plot Themes

```{r}
BAR_THEME <- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10),
                   axis.text.y = element_text(size = 12),
                   axis.title.x = element_text(size = 15), 
                   axis.title.y = element_text(size = 15),
                   plot.title = element_text(hjust = 0.5, size = 20),
          legend.position = "none",
          panel.grid.major = element_line(colour = "grey"),   # Major grid lines
          panel.background = element_rect(fill = "white", colour = "black")) 

SCATTER_THEME <- theme(axis.text.x = element_text(size = 12),
                   axis.text.y = element_text(size = 12),
                   axis.title.x = element_text(size = 15), 
                   axis.title.y = element_text(size = 15),
                   plot.title = element_text(hjust = 0.5, size = 20),
          legend.position = "none",
          panel.grid.major = element_line(colour = "grey"),   # Major grid lines
          panel.background = element_rect(fill = "white", colour = "black")) 
```


### Color Palette:

```{r}
colPalette_celltypes = c('#532C8A',
 '#c19f70',
 '#f9decf',
 '#c9a997',
 '#B51D8D',
 '#3F84AA',
 '#9e6762',
 '#354E23',
 '#F397C0',
 '#ff891c',
 '#635547',
 '#C72228',
 '#f79083',
 '#EF4E22',
 '#989898',
 '#7F6874',
 '#8870ad',
 '#647a4f',
 '#EF5A9D',
 '#FBBE92',
 '#139992',
 '#cc7818',
 '#DFCDE4',
 '#8EC792',
 '#C594BF',
 '#C3C388',
 '#0F4A9C',
 '#FACB12',
 '#8DB5CE',
 '#1A1A1A',
 '#C9EBFB',
 '#DABE99',
 '#65A83E',
 '#005579',
 '#CDE088',
 '#f7f79e',
 '#F6BFCB')

celltypes <- (as.data.frame(getCellColData(proj)) %>% group_by(celltypes) %>% 
 summarise(n = n()))$celltypes

col <- setNames(colPalette_celltypes, celltypes)
```


Select a few marker genes and transcription factors:
```{r}
marker_genes <- c("Lamb1",  "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
                  "Apoa2", "Apoe", "Cystm1", "Emb", "Spink1",  "Krt19",
                  "Dkk1", "Grhl3", "Trp63", "Grhl2",  "Pax6", "Pax2",
                  "En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
                  "Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
                  "Hes3", "Hba-a2", "Hba-a1",  "Hbb-bh1", "Gata1",
                   "Gata6",
                  #"Gata6", "Gata5",
                  "Cited4",
                   "Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
                  "Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
                  "Hoxa10", "Tnnt2", "Myl4",  "Myl7", "Acta2",
                  "Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
                  "Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
                  "Mesp2", "Lefty2", "Mesp1", "Cer1",  "Chrd", 
                  "Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
                   "Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
                  "Epcam", "Pou5f1" )
#"Sox2"
#"Gata2"
#"Gata4"
#  "Gata5",
#"Gata6",
# "Gsc",
```

## Deep Learning motifs vs. cisbp motifs

```{r, fig.height=10, fig.width=15}

markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15", 
  "Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4",  "Gata6", "Sox10",
  "Sox11" ,"Sox13","Sox15","Sox17"      
  ,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )

# select only markers present in all three matrices 
markers <- markers[markers %in% rownames(deep_motif_mtx)]
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]



for (n in markers){
  ## BAR PLOTS
  score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
                                         y_label = "Gene activity score")
  ggsave(paste0(plot_dir, "score", n, ".pdf"))
  
  
  motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata, 
                                         y_label = "ChromVar scores")
  ggsave(paste0(plot_dir, "motif", n, ".pdf"))
  
  
  deep_plot <- plot_score_per_celltypes(n, deep_motif_mtx, metadata,
                                        y_label = "Deep learning ChromVar scores")
  ggsave(paste0(plot_dir, "deep", n, ".pdf"), deep_plot)
  
  
  ## SCATTER PLOTS
  scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata, 
              x_label = "Gene activity scores",
              y_label = "ChromVar scores")
  ggsave(paste0(plot_dir, "scatter_motif", n, ".pdf"), scatter_motif)

  
  scatter_deep <- scatterplot(n, gene_scores_mat, deep_motif_mtx, metadata,
              x_label = "Gene activity scores",
              y_label = "Deep learning ChromVar scores")
  ggsave(paste0(plot_dir, "scatter_deep", n, ".pdf"), scatter_deep)


  ## Combine Plots
  print(cowplot::plot_grid(score_plot, motif_plot, deep_plot,
                           scatter_motif, scatter_deep, ncol = 3))
  
  
}


```

## ChromVar scores using cisbp

```{r, fig.height=10, fig.width=15}

markers <- c("Lamb1",  "Sparc", "Elf5", "Ascl2", "Tfap2c", "Ttr",
                  "Apoa2", "Apoe", "Cystm1", "Emb", "Spink1",  "Krt19",
                  "Dkk1", "Grhl3", "Trp63", "Grhl2",  "Pax6", "Pax2",
                  "En1", "Foxd3", "Tfap2a", "Pax3", "Sox9",
                  "Six3", "Hesx1", "Irx3", "Hoxb9", "Cdx4",
                  "Hes3", "Hba-a2", "Hba-a1",  "Hbb-bh1", "Gata1",
                   "Gata6",
                  #"Gata6", "Gata5",
                  "Cited4",
                   "Cdh5", "Pecam1", "Anxa5", "Etv2", "Igf2",
                  "Krt8", "Krt18", "Pmp22", "Ahnak", "Bmp4", "Tbx4", "Hoxa11",
                  "Hoxa10", "Tnnt2", "Myl4",  "Myl7", "Acta2",
                  "Smarcd3", "Tcf21", "Tbx6", "Dll1", "Aldh1a2", "Tcf15",
                  "Meox1", "Tbx1", "Gbx2", "Cdx1", "Hoxb1", "Hes7", "Osr1",
                  "Mesp2", "Lefty2", "Mesp1", "Cer1",  "Chrd", 
                  "Foxa2", "Pax7", "Fgf8", "Lhx1", "Mixl1", "Otx2", "Hhex",
                   "Ifitm3", "Nkx1-2", "Eomes", "Nanog", "Utf1",
                  "Epcam", "Pou5f1" )

# select only markers prsent in both matrices
markers <- markers[markers %in% rownames(motif_mtx)]
markers <- markers[markers %in% rownames(gene_scores_mat)]

for (n in markers){
  ## BAR PLOTS
  score_plot <- plot_score_per_celltypes(n, gene_scores_mat, metadata,
                                         y_label = "Gene activity score")

  
  motif_plot <- plot_score_per_celltypes(n, motif_mtx, metadata, 
                                         y_label = "ChromVar scores")

  
  
  ## SCATTER PLOTS
  scatter_motif <- scatterplot(n, gene_scores_mat, motif_mtx, metadata, 
              x_label = "Gene activity scores",
              y_label = "ChromVar scores")


  ## Combine Plots
  print(cowplot::plot_grid(score_plot, motif_plot,
                           scatter_motif, ncol = 2))
  
  
}
```




# GATA Factors

```#{r, fig.width=6, fig.height=6}
for (n in c("Gata1", "Gata2", "Gata3", "Gata4", "Gata5", "Gata6")) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in%  n, ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # select motif z score
  motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
  metadata[paste0("motif_", n)] <- motif_n
  
  # create barplots for gene scores, gene expression and motif z-score
  plots <- map(c("score_", "motif_"), function(p){
    df <- metadata %>%
    group_by(celltypes) %>%
    summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
    plot <- df %>% ggplot() +
    geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                 fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    labs(title = paste0(n), x = "celltype", y = paste0(p)) +
    BAR_THEME
    print(plot)
    ggsave(paste0(plot_dir, p, n, ".pdf"))
    print(plot)
  })
  sea_plot <- map(seq.int(1), function(sea){
    plot <- sea_meta %>% 
    group_by(celltypes) %>%
    summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
    ggplot() +
    geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    labs(title = paste0(n), y = "SEACell deviation score") +
    BAR_THEME
    print(plot)
    ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
    print(plot)
    
  })
  # create scatter plots for gene expression and motif z-score
  scatter_plots <- map(seq.int(1), function(s){
    df <- metadata %>% group_by(celltypes) %>% 
    summarise_at(vars(paste0("motif_", n), 
                      paste0("score_", n)), 
                 funs(mean(., na.rm = TRUE)))
    plot <- df %>%
    ggplot() +
    geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                    y = df %>% pull(paste0("motif_", n))),
                formula = y ~ x, method = "lm", size = .1) +
    geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                   y = df %>% pull(paste0("motif_", n)), 
                   col = celltypes, size = 1)) +
    SCATTER_THEME +
    #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
    scale_color_manual(values = col) +
    labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score")
    print(plot)
    ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
    print(plot)
    })
  
  do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) 
  #%>% ggsave(paste0(plot_dir, n, ".pdf"))

}
```

# Marker Genes

## SEACells


```#{r, fig.width=6, fig.height=6, eval = FALSE}

for (n in marker_genes) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # if the marker gene is a TF
  if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
    
      # select motif z score
      motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
      metadata[paste0("motif_", n)] <- motif_n
      
      # create barplots for gene scores, gene expression and motif z-score
      plot <- map(c("score_", "motif_"), function(p){
        df <- metadata %>%
          group_by(celltypes) %>%
          summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
          plot <- df %>% ggplot() +
          geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                       fill = celltypes), stat = "identity") +
          scale_fill_manual(values = col) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                legend.position = "none") +
          labs(title = paste0(n), x = "celltype", y = paste0(p)) +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, p, n, ".pdf"))
        print(plot)
      })
      
      # create scatter plots for gene expression and motif z-score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>% 
        summarise_at(vars(paste0("motif_", n), 
                          paste0("score_", n)), 
                     funs(mean(., na.rm = TRUE)))
        plot <- df %>%
          ggplot() +
          geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                          y = df %>% pull(paste0("motif_", n))),
                      formula = y ~ x, method = "lm", size = .1) +
          geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                         y = df %>% pull(paste0("motif_", n)), 
                         col = celltypes, size = 1)) +
          #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
          theme(legend.position = "none") +
          scale_color_manual(values = col) +
          labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
        print(plot)
      })
        sea_plot <- map(seq.int(1), function(sea){
          plot <- sea_meta %>% 
            group_by(celltypes) %>%
            summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
            ggplot() +
            geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
            scale_fill_manual(values = col) +
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
            labs(title = paste0(n), y = "SEACell deviation score") +
            BAR_THEME
          print(plot)
          ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
          print(plot)
          
    })
      
    # if the marker gene is no TF
    } else {
      print("no")
      
      # # create barplots for gene scores, gene expression and motif z-score
      # plots <- map(c("score_"), function(p){
      #   df <- metadata %>%
      #     group_by(celltypes) %>%
      #     summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
      #     plot <- df %>% ggplot() +
      #     geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
      #                  fill = celltypes), stat = "identity") +
      #     scale_fill_manual(values = col) +
      #     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
      #           legend.position = "none") +
      #     labs(title = paste0(n), x = "celltype", y = paste0(p)) +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, p, n, ".pdf"))
      #   print(plot)
      # 
      # })
      # 
      # # create scatter plots for gene expression and gene_score
      # scatter_plots <- map(seq.int(1), function(s){
      #   df <- metadata %>% group_by(celltypes) %>%
      #   summarise_at(vars(paste0("score_", n),
      #                     paste0("expr_", n)),
      #                funs(mean(., na.rm = TRUE)))
      #   plot <- df %>%
      #     ggplot() +
      #     geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
      #                     y = df %>% pull(paste0("score_", n))),
      #                 formula = y ~ x, method = "lm", size = .1) +
      #     geom_point(aes(x = df %>% pull(paste0("expr_", n)),
      #                    y = df %>% pull(paste0("score_", n)),
      #                    col = celltypes, size = 1)) +
      #     #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
      #     theme(legend.position = "none") +
      #     scale_color_manual(values = col)+
      #     labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
      #   print(plot)
      #})
    }

  #do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
  # 

}
```


# Deep learning motifs


```{r}
proj1 <- loadArchRProject("/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/06_deep_chromvar/")

deep <- getMatrixFromProject(proj1, useMatrix = "DeepLearningMotifs1")
deep_matrix <- assays(deep)[[2]]

deep_matrix <- deep_matrix[, colnames(motif_mtx)]

stopifnot(all(colnames(motif_mtx) == colnames(deep_matrix)))
#test <- rownames(deep_matrix)
rownames(deep_matrix) <- tolower(rownames(deep_matrix))
substr(rownames(deep_matrix), 1, 1) <- toupper(substr(rownames(deep_matrix), 1, 1))

```


# TFs of interest

```#{r}
markers <- c("Elf5", "Pax2", "Pax6", "Pax3", "Pax7", "Hoxb9", "Cdx4", "Hoxa11", "Hoxa10", "Tcf15", 
  "Tbx1", "Tbx6", "Mesp2", "Mesp1", "Pouf51", "Gata1", "Gata2", "Gata3", "Gata4",  "Gata6", "Sox10",
  "Sox11" ,"Sox13","Sox15","Sox17"      
  ,"Sox2" , "Sox3", "Sox30", "Sox4", "Sox5", "Sox6", "Sox9", "Klf1", "Klf3", "Klf4", "Klf5", "Klf9" )

markers <- markers[markers %in% rownames(deep_matrix)]

markers <- markers[markers %in% rownames(gene_scores_mat)]
markers
```




```#{r, fig.width=6, fig.height=6}
plot_dir <-  "/omics/groups/OE0533/internal/katharina/scDoRI/gastrulation_data_new/Kathi/plots/deep_ChromVar/"

for (n in markers) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in%  n, ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  # expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  # metadata[paste0("expr_", n)] <- expr_n
  
  
  # seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  # sea_meta[paste0("seacell_", n)] <- seacells_n

  # select motif z score
  deep_n <- deep_matrix[rownames(deep_matrix) %in% n, ]
  metadata[paste0("deep_", n)] <- deep_n
  
  # create barplots for gene scores, gene expression and motif z-score
  plots <- map(c("score_", "deep_"), function(p){
    df <- metadata %>%
    group_by(celltypes) %>%
    summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
    plot <- df %>% ggplot() +
    geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                 fill = celltypes), stat = "identity") +
    scale_fill_manual(values = col) +
    labs(title = paste0(n), x = "celltype", y = paste0(p)) +
    BAR_THEME
    print(plot)
    ggsave(paste0(plot_dir,  p, n, ".pdf"))
    print(plot)
  })
    scatter_plots <- map(seq.int(1), function(s){
    df <- metadata %>% group_by(celltypes) %>% 
    summarise_at(vars(paste0("deep_", n), 
                      paste0("score_", n)), 
                 funs(mean(., na.rm = TRUE)))
    plot <- df %>%
    ggplot() +
    geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                    y = df %>% pull(paste0("deep_", n))),
                formula = y ~ x, method = "lm", size = .1) +
    geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                   y = df %>% pull(paste0("deep_", n)), 
                   col = celltypes, size = 1)) +
    SCATTER_THEME +
    #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
    scale_color_manual(values = col) +
    labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score (deep learning)")
    print(plot)
    ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
    print(plot)
    })
  
  
  
  do.call(gridExtra::grid.arrange, c(plots, sea_plot, scatter_plots, ncol = 2)) 
  #%>% ggsave(paste0(plot_dir, n, ".pdf"))

}
```


```#{r}
for (n in marker_genes) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # if the marker gene is a TF
  if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
    
      # select motif z score
      motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
      metadata[paste0("motif_", n)] <- motif_n
      
      # create barplots for gene scores, gene expression and motif z-score
      plot <- map(c("score_", "motif_"), function(p){
        df <- metadata %>%
          group_by(celltypes) %>%
          summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
          plot <- df %>% ggplot() +
          geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                       fill = celltypes), stat = "identity") +
          scale_fill_manual(values = col) +
          theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                legend.position = "none") +
          labs(title = paste0(n), x = "celltype", y = paste0(p)) +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, p, n, ".pdf"))
        print(plot)
      })
      
      # create scatter plots for gene expression and motif z-score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>% 
        summarise_at(vars(paste0("motif_", n), 
                          paste0("score_", n)), 
                     funs(mean(., na.rm = TRUE)))
        plot <- df %>%
          ggplot() +
          geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                          y = df %>% pull(paste0("motif_", n))),
                      formula = y ~ x, method = "lm", size = .1) +
          geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                         y = df %>% pull(paste0("motif_", n)), 
                         col = celltypes, size = 1)) +
          #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
          theme(legend.position = "none") +
          scale_color_manual(values = col) +
          labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score") +
          BAR_THEME
        print(plot)
        ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
        print(plot)
      })
        sea_plot <- map(seq.int(1), function(sea){
          plot <- sea_meta %>% 
            group_by(celltypes) %>%
            summarise(mean = mean(!!sym(paste0("seacell_",n)))) %>%
            ggplot() +
            geom_bar(aes(x = celltypes, y = mean, fill = celltypes), stat = "identity") +
            scale_fill_manual(values = col) +
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "None") +#%>% print()
            labs(title = paste0(n), y = "SEACell deviation score") +
            BAR_THEME
          print(plot)
          ggsave(paste0(plot_dir, "seacell_bar_", n, ".pdf"))
          print(plot)
          
    })
      
    # if the marker gene is no TF
    } else {
      print("no")
      
      # # create barplots for gene scores, gene expression and motif z-score
      # plots <- map(c("score_"), function(p){
      #   df <- metadata %>%
      #     group_by(celltypes) %>%
      #     summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
      #     plot <- df %>% ggplot() +
      #     geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
      #                  fill = celltypes), stat = "identity") +
      #     scale_fill_manual(values = col) +
      #     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
      #           legend.position = "none") +
      #     labs(title = paste0(n), x = "celltype", y = paste0(p)) +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, p, n, ".pdf"))
      #   print(plot)
      # 
      # })
      # 
      # # create scatter plots for gene expression and gene_score
      # scatter_plots <- map(seq.int(1), function(s){
      #   df <- metadata %>% group_by(celltypes) %>%
      #   summarise_at(vars(paste0("score_", n),
      #                     paste0("expr_", n)),
      #                funs(mean(., na.rm = TRUE)))
      #   plot <- df %>%
      #     ggplot() +
      #     geom_smooth(aes(x = df %>% pull(paste0("expr_", n)),
      #                     y = df %>% pull(paste0("score_", n))),
      #                 formula = y ~ x, method = "lm", size = .1) +
      #     geom_point(aes(x = df %>% pull(paste0("expr_", n)),
      #                    y = df %>% pull(paste0("score_", n)),
      #                    col = celltypes, size = 1)) +
      #     #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
      #     theme(legend.position = "none") +
      #     scale_color_manual(values = col)+
      #     labs(title = paste0(n), x = "gene expression", y = "gene activity score") +
      #     BAR_THEME
      #   print(plot)
      #   ggsave(paste0(plot_dir, "scatterPlot_", n, ".pdf"))
      #   print(plot)
      #})
    }

  #do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
  # 

}
```


Plot gene expression, gene scores & motif z-scores:

```#{r, fig.width=10, fig.height=10}

for (n in marker_genes) {
  print(n)
  # select one row for a particular gene
  score_n <- gene_scores_mat[rownames(gene_scores_mat) %in% c(n), ]
  # add score for this gene to metadata
  metadata[paste0("score_",n)] <- score_n
  # select gene expression for a particular gene
  expr_n <- lognorm[rownames(lognorm) %in% c(n), ]
  metadata[paste0("expr_", n)] <- expr_n
  
  
  seacells_n <- sea_mtx[rownames(sea_mtx) == tfs[grepl(paste0("^", n), tfs)], ]
  sea_meta[paste0("seacell_", n)] <- seacells_n

  # if the marker gene is a TF
  if (length(tfs[grepl(paste0("^", n), tfs)]) > 0) {
    
      # select motif z score
      motif_n <- motif_mtx[rownames(motif_mtx) ==  tfs[grepl(paste0("^", n), tfs)], ]
      metadata[paste0("motif_", n)] <- motif_n
      
      # create barplots for gene scores, gene expression and motif z-score
      plots <- map(c("score_", "expr_", "motif_"), function(p){
        df <- metadata %>%
        group_by(celltypes) %>%
        summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
        df %>% ggplot() +
        geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                     fill = celltypes), stat = "identity") +
        scale_fill_manual(values = col) +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              legend.position = "none") +
        labs(title = paste0(n), x = "celltype", y = paste0(p))
      })
      
      # create scatter plots for gene expression and motif z-score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>% 
        summarise_at(vars(paste0("motif_", n), 
                          paste0("score_", n)), 
                     funs(mean(., na.rm = TRUE)))
        df %>%
        ggplot() +
        geom_smooth(aes(x = df %>% pull(paste0("score_", n)), 
                        y = df %>% pull(paste0("motif_", n))),
                    formula = y ~ x, method = "lm", size = .1) +
        geom_point(aes(x = df %>% pull(paste0("score_", n)), 
                       y = df %>% pull(paste0("motif_", n)), 
                       col = celltypes, size = 1)) +
        #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
        theme(legend.position = "none") +
        scale_color_manual(values = col) +
        labs(title = paste0(n), x = "gene activity score", y = "TF-motif z-score")
        })
      
    # if the marker gene is no TF
    } else {
      
      # create barplots for gene scores, gene expression and motif z-score
      plots <- map(c("score_", "expr_"), function(p){
        df <- metadata %>%
        group_by(celltypes) %>%
        summarise_at(vars(paste0(p, n)), funs(mean(., na.rm=TRUE)))
        df %>% ggplot() +
        geom_bar(aes(x = celltypes, y = df %>% pull(paste0(p, n)), 
                     fill = celltypes), stat = "identity") +
        scale_fill_manual(values = col) +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              legend.position = "none") +
        labs(title = paste0(n), x = "celltype", y = paste0(p))

      })
      
      # create scatter plots for gene expression and gene_score
      scatter_plots <- map(seq.int(1), function(s){
        df <- metadata %>% group_by(celltypes) %>%
        summarise_at(vars(paste0("score_", n),
                          paste0("expr_", n)),
                     funs(mean(., na.rm = TRUE))) 
        df %>%
        ggplot() +
        geom_smooth(aes(x = df %>% pull(paste0("expr_", n)), 
                        y = df %>% pull(paste0("score_", n))),
                    formula = y ~ x, method = "lm", size = .1) +
        geom_point(aes(x = df %>% pull(paste0("expr_", n)),
                       y = df %>% pull(paste0("score_", n)),
                       col = celltypes, size = 1)) +
        #labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
        theme(legend.position = "none") +
        scale_color_manual(values = col)+
        labs(title = paste0(n), x = "gene expression", y = "gene activity score")

      })
    }

  do.call(gridExtra::grid.arrange, c(plots, scatter_plots, ncol = 2, nrow = 2))
  # 

}
```



```#{r, fig.width=10, fig.height=10}
p1 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(Gata1), funs(median(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = Gata1, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

p2 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(gata1_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = gata1_score_p2g, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")


p3 <- metadata %>% group_by(celltypes) %>% 
  summarise_at(vars(gata1_z_scores, Gata1), funs(mean(., na.rm = TRUE))) %>%
  ggplot() +
  geom_smooth(aes(x = Gata1, y = gata1_z_scores), formula = y ~ x, method = "lm", size = .1) +
  geom_point(aes(x = Gata1, y = gata1_z_scores, col = celltypes, size = 1)) +
  labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
  theme(legend.position = "none") +
  scale_color_manual(values = col) 

p4 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(gata1_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = gata1_z_scores, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

cowplot::plot_grid(p1, p2, p3, p4)

p1 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(Sox9), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = Sox9, fill = celltypes), stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "none") +
  scale_fill_manual(values = col)



p2 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(sox9_z_scores), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = sox9_z_scores, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")

p3 <- metadata %>%
  group_by(celltypes) %>%
  summarise_at(vars(sox9_score_p2g), funs(mean(., na.rm=TRUE))) %>% ggplot() +
  geom_bar(aes(x = celltypes, y = sox9_score_p2g, fill = celltypes), stat = "identity") +
  scale_fill_manual(values = col) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
        legend.position = "none")


p4 <- metadata %>% group_by(celltypes) %>% 
  summarise_at(vars(sox9_z_scores, Sox9), funs(mean(., na.rm = TRUE))) %>%
  ggplot() +
  geom_smooth(aes(x = Sox9, y = sox9_z_scores), formula = y ~ x, method = "lm", size = .1) +
  geom_point(aes(x = Sox9, y = sox9_z_scores, col = celltypes, size = 1)) +
  labs(x = "Gata1 gene expression", y = "Gata1 motif accessibility (z-score)") +
  theme(legend.position = "none") +
  scale_color_manual(values = col) 

cowplot::plot_grid(p1, p2, p3, p4)

```






